library("CostTrajectory")
## Warning: package 'survival' was built under R version 3.6.3
## Warning: package 'foreach' was built under R version 3.6.3
## Warning: package 'plot3D' was built under R version 3.6.3
In this document, I provide a simulation example for the CostTrajectory package and with beautiful plot examples for the paper. For more details on the methods behind, please check on:
citation("CostTrajectory")
This example takes the documentation from CostTrajectory() and shows the results when censored data is included in the estimation procedure. We simulate data and then fit them:
data <- CostTrajectory_simulate_data(n=50)
head(data)
## id surv death time status Y
## 1 1 9 112 1 0 13.073536
## 2 1 9 112 2 0 11.285352
## 3 1 9 112 3 0 11.000561
## 4 1 9 112 4 0 9.900468
## 5 1 9 112 5 0 8.961636
## 6 1 9 112 6 0 8.993106
fit <- CostTrajectory(time=data$time, surv=data$surv, cost=data$Y, id=data$id, status=data$status)
## Warning in fit_cost(B, theta.old, z, id, varfunc, usecensor, idx.sts, idx.lts, :
## parameter estimates did NOT converge in 20 iterations. Increase MAX.IT in
## control.
## Warning in fit_cost(B, theta.old, z, id, varfunc, usecensor, idx.sts, idx.lts, :
## parameter estimates did NOT converge in 20 iterations. Increase MAX.IT in
## control.
Then we check the plots and summary by looking at
plot(fit)
summary(fit)
##
## Number of Observations : 50
## of which maximum follow-up time tau: 100
## long-term survivors (LTS): 7 ( 14 % )
## censored prior to tau: 35 ( 70 % )
## (Selected) smoothing parameters
## over time-axis: 1e-05
## over surv-axis: 1e-05
## over LTS -axis: 1e-05
## (Estimated) correlation parameter (alpha):
## over uncensored pateints: 0.297
## over LTS: 0.0931
##
## Residuals:
## Min 1Q Median 3Q Maobject
## -5.3359 -1.0032 -0.2269 0.4880 6.2634
##
## Settings and control:
## each axis: 7 - differences of order 2
## convergence tolerance : 1e-04
## generalized cross validation (GCV) : 1351.6
Population mean results are also of interest for descripive analysis. We use a function CostTrajectory_PopulaionMean() to produce pointwise average estimates based solely on uncensored data.
fit.popmean <- CostTrajectory_PopulationMean(data)
tau <- max(data$surv)
Z0 <- matrix(NA,tau+2,tau)
for(i in 1:tau){
for(j in 1:i){
Z0[i,j] <- fit.popmean$Y[fit.popmean$death == i & fit.popmean$time == j]
}
}
for(j in 1:tau)
Z0[tau+2,j] <- fit.popmean$Y[fit.popmean$death == tau+1 & fit.popmean$time == j]
par(mfrow = c(1,2))
image2D(x=1:tau,y=1:(tau+2), t(Z0), main = 'Population mean heatmap',
xlab = 'time',ylab = 'survival')
persp3D(y=1:tau,x=1:(tau+2), z = Z0, main = 'Population mean surface',
ylab = 'time',xlab = 'survival', zlab='Cost',
clab = "Cost",theta=30,phi=25)
These plots are unsmooth and curvy, because our sample size is small (n=50), and in some survival times there is no data. Therefore CostTrajectory() is necessary to improve the interpretability of this kind of cost data.
Thought it would be more efficient and elegant to is provide interactive plots, here we show how to do it. First we load packages and preprocess the results.
library(ggplot2)
## Warning: package 'ggplot2' was built under R version 3.6.3
library(wesanderson)
## Warning: package 'wesanderson' was built under R version 3.6.3
library(scales)
## Warning: package 'scales' was built under R version 3.6.3
library(gridExtra)
## Warning: package 'gridExtra' was built under R version 3.6.3
library(plotly)
## Warning: package 'plotly' was built under R version 3.6.3
library(rayshader)
## Warning: package 'rayshader' was built under R version 3.6.3
pal <- wes_palette("Zissou1", 10, type = "continuous")
ndx <- 5; deg <- 2
tau <- fit$tau
x <- 1:tau; y <- 1:tau
Bi <- fit$test$B
theta1 <- matrix(fit$coefficent$theta,ncol=1)
theta2 <- matrix(fit$coefficent.init$theta,ncol=1)
dat.test <- fit$test$data
s.lst <- c(c(2,4,6,8)*10,101)
vv1 = fit$coefficent$variance$variance
vv2 = fit$coefficent.init$variance$variance
Bi1 <- cbind(Bi, matrix(0,nrow(Bi),ndx+deg))
se1 = sqrt(diag(Bi1 %*% vv1 %*% t(Bi1)))
se2 = sqrt(diag(Bi1 %*% vv2 %*% t(Bi1)))
yhat1 <- as.vector(Bi %*% theta1)
yhat2 <- as.vector(Bi %*% theta2)
pal <- wes_palette("Zissou1", 10, type = "continuous")
dat.test$Y1 <- as.array(yhat1)
dat.test$Y2 <- as.array(yhat2)
idx <- which(dat.test$death %in% s.lst)
pt1d <- data.frame(dat.test, group = paste0('Fitted', dat.test$id),
Stage = rep('Fitted',nrow(dat.test)),
YY=yhat1,Ylb = yhat1-1.96*se1,Yub = yhat1+1.96*se1)
pt2d <- data.frame(dat.test, group = paste0('Initial', dat.test$id),
Stage = rep('Initial',nrow(dat.test)),
YY=yhat2,Ylb = yhat2-1.96*se1,Yub = yhat1+1.96*se2)
We plot the fitted curves on selected survival times, and compared two groups (Fitted vs Initial). “Fitted” uses censored data while “Initial” does not.
ptd <- rbind(pt1d[idx,],pt2d[idx,])
ptd$id[ptd$id==101] <- '100+'
ptd$id <- factor(ptd$id,levels=c("20","40","60","80","100+" ))
ptd$Method <- 'Estimate'
xx <- ptd[,c('death','time','YY','Stage','Ylb','Yub','id')]
p=ggplot(xx)+geom_line(aes(time,YY,group=Stage,color=Stage,linetype=Stage))+
geom_ribbon(data=subset(xx, 0 <= xx$time & xx$time <= tau),
aes(x=time,xmin=0,xmax=tau,ymin=Ylb,ymax=Yub,
group=Stage,fill=Stage), alpha=0.3) +
scale_fill_manual(values = c("blue", "red", 'green')) +ylim(0,15)+
xlab('Time')+ylab('Cost')+
scale_x_continuous(breaks = seq(0,tau, by = 10)) +
scale_color_manual(values = c("blue", "red"))+
theme(panel.background = element_rect(fill='white',color='black'),
panel.grid.major = element_line(colour = "lightgrey",size=.2),
legend.position = "none",
legend.box = "vertical",
plot.title = element_text(hjust = 0.5)) +
facet_grid(rows = vars(id))
ggplotly(p)
The blue and red curves are close in each survival group, suggesting our method handles the censored cost data well. For real world applications, this plot can be used for sensitivity analysis for informative censoring. Slightly change of input can perform subgroup comparison, for example, early stage cancer costs versus late stage cancer costs.
Next we shows the heatmap and 3D surface.
ptd <- rbind(pt1d,pt2d)
ptd$id <- as.factor(ptd$id)
yy <- ptd[ptd$death>tau,]
ptd <- ptd[ptd$death<=tau,]
yy$death <- tau+5
yy2 <- yy
for(i in (tau+6:15)){
yy1 <- yy; yy1$death <- i
yy2 <- rbind(yy2,yy1)
}
ptd1 <- rbind(ptd, yy2)
p <- ggplot(ptd1,aes(y=death,x=time))+
geom_raster(aes(fill=YY)) +
geom_contour(aes(z=Y),color='black',breaks=c(3,6,9))+
xlab('Time')+ylab('Survival ')+
scale_fill_gradientn(colours = pal, limits=c(0,18),name='Cost\n',
values = rescale(c(0,8,10,18),
from = c(0,18)))+
theme(panel.background = element_rect(fill='white',color='black'),
plot.title = element_text(hjust = 0.5), legend.key.height = unit(1, "cm")
)+
scale_x_continuous(breaks = seq(0,tau, by = 10)) +
scale_y_continuous(breaks = seq(0,tau, by = 10)) +
facet_grid(cols = vars(Stage))
ggplotly(p)
## Warning: Raster pixels are placed at uneven vertical intervals and will be
## shifted. Consider using geom_tile() instead.
y = 1:(tau+15); x=1:tau
z <- matrix(NA, tau,tau+15)
for(i in 1:tau) z[(1:i),i] <- ptd1$YY[ptd1$group == paste0('Fitted',i)]#Local/Regional
for(i in (6:15)+tau) z[,i] <- ptd1$YY[(ptd1$group == paste0('Fitted',tau+1)) & (ptd1$death==i)]
p <- plot_ly(x=y, y=x, z=z,colors = colorRamp(pal,bias=2,alpha=0.7),
type = 'surface',
contours = list(
x = list(show = TRUE, start = 20, end = tau, size = 20, color = 'white'),
y = list(show = TRUE, start = 20, end = tau, size = 20, color = 'white'),
z = list(show = TRUE, start = 3, end = 15, size = 1, color = 'white'))) %>% layout(
scene = list(
camera=list(
eye = list(x=2, y=1, z=.5)
)
)
) %>% layout(scene = list(yaxis = list(title='Time',
tickvals = seq(20,tau,20)),
xaxis = list(title='Survrival',
tickvals = seq(20,tau,20)),
zaxis = list(title='Cost',
tickvals = seq(0,15,5))))
p